{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluators\n",
    "\n",
    "This notebook tests the ergonomics of `LLMEvaluator` and `run_evals`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "import pandas as pd\n",
    "from phoenix.experimental.evals import (\n",
    "    EvalCriteria,\n",
    "    LLMEvaluator,\n",
    "    OpenAIModel,\n",
    ")\n",
    "from phoenix.experimental.evals.functions.classify import run_evals\n",
    "\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = OpenAIModel(\"gpt-4\")\n",
    "relevance_evaluator = LLMEvaluator.from_criteria(EvalCriteria.RELEVANCE, model)\n",
    "hallucination_evaluator = LLMEvaluator.from_criteria(EvalCriteria.HALLUCINATION, model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataframe = pd.DataFrame(\n",
    "    [\n",
    "        {\n",
    "            \"input\": \"What is the capital of California?\",\n",
    "            \"reference\": \"Sacramento is the capital of California.\",\n",
    "            \"output\": \"Sacramento\",\n",
    "        },\n",
    "        {\n",
    "            \"input\": \"What is the capital of California?\",\n",
    "            \"reference\": \"Carson City is the Capital of Nevada.\",\n",
    "            \"output\": \"Carson City\",\n",
    "        },\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With explanations, with function calling."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs = run_evals(\n",
    "    dataframe,\n",
    "    [relevance_evaluator, hallucination_evaluator],\n",
    "    provide_explanation=True,\n",
    "    use_function_calling_if_available=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With explanations, without function calling."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs = run_evals(\n",
    "    dataframe,\n",
    "    [relevance_evaluator, hallucination_evaluator],\n",
    "    provide_explanation=True,\n",
    "    use_function_calling_if_available=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Without explanations, with function calling."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs = run_evals(\n",
    "    dataframe,\n",
    "    [relevance_evaluator, hallucination_evaluator],\n",
    "    provide_explanation=False,\n",
    "    use_function_calling_if_available=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Without explanations, without function calling."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs = run_evals(\n",
    "    dataframe,\n",
    "    [relevance_evaluator, hallucination_evaluator],\n",
    "    provide_explanation=False,\n",
    "    use_function_calling_if_available=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_dfs[1]"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
